rm(list = ls())

setwd("/Users/Joan/Google Drive/Article Twitter Streams and Removal Rates/Data")
library(plyr)
#setwd("/Users/Joan")
load("time1.RData")
all.data1 <- all.data
load("time2.RData")
all.data2 <- all.data
load("time3.RData")
all.data3 <- all.data
load("time4.RData")
time1 <- ldply(all.data, data.frame)
for (i in 1:nrow(time1)) {
  time1[i,10] <- paste(time1[i,1], 1, sep="")
}
time2 <- ldply(all.data1, data.frame)
time2 <- time2[,-c(10:12)]
for (i in 1:nrow(time2)) {
  time2[i,10] <- paste(time2[i,1], 2, sep="")
}
time3 <- ldply(all.data2, data.frame)
for (i in 1:nrow(time3)) {
  time3[i,10] <- paste(time3[i,1], 3, sep="")
}
time4 <- ldply(all.data3, data.frame)
for (i in 1:nrow(time4)) {
  time4[i,10] <- paste(time4[i,1], 4, sep="")
}
time1 <- rbind(time1, time2, time3, time4)
rm(time2, time3, time4)
for (i in 1:nrow(time1)) {
  time1[i,11] <- substr(time1[i,1], 1, nchar(time1[i,1])-1)
}
colnames(time1)[10] = "take"
colnames(time1)[11] = "group"
time1[nrow(time1)+1,10] <- "pablo iglesias32"
time1 = time1[order(time1$take),]


total <- NULL
match.notid <- NULL
match.id <- NULL
onlyfw <- NULL
onlybw <- NULL
for (i in 1:length(table(time1$group))) {
  id = unique(na.omit(time1$group))[i]
  for (j in 1:9){
    for (f in 1:4){
      df = subset(time1, time1$take==paste(id, j, f, sep=""))
      total <- c(total, nrow(df))
      df2 = subset(df, df$is.bw==1 & df$is.fw==1)
      match.notid <- c(match.notid, nrow(df2)/nrow(df))
      idbw <- df$id_str[df$is.bw==1]
      idfw <- df$id_str[df$is.fw==1]
      test1 = idfw %in% idbw
      test2 = idbw %in% idfw
      test1[test1==TRUE] <- 1
      test1[test1==FALSE] <- 0
      test2[test2==TRUE] <- 1
      test2[test2==FALSE] <- 0
      pct <- mean(test1)
      match.id = c(match.id, pct) ##ACTUAL MATCHING PERCENTAGE
      if (sum(test1)!=length(test1) & sum(test2)!=length(test2)){
        onlyfw <- c(onlyfw, (table(test1)[[1]]/total[length(total)]))
        onlybw <- c(onlybw, table(test2)[[1]]/total[length(total)])
      } else if (sum(test1)==length(test1) & sum(test2)!=length(test2)) {
        onlyfw <- c(onlyfw, 0)
        onlybw <- c(onlybw, table(test2)[[1]]/total[length(total)])
      } else if (sum(test1)!=length(test1) & sum(test2)==length(test2)) {
        onlyfw <- c(onlyfw, table(test1)[[1]]/total[length(total)])
        onlybw <- c(onlybw, 0)
      } else if (sum(test1)==length(test1) & sum(test2)==length(test2)) {
        onlyfw <- c(onlyfw, 0)
        onlybw <- c(onlybw, 0)
      }
    }
  }
}


time = data.frame(cbind(unique(time1$take), total, match.notid, match.id, onlyfw, onlybw))
colnames(time)[1] = "each"
time[,1] <- as.character(time[,1])
for (i in 1:nrow(time)) {
  time[i,7] <- substr(time[i,1], 1, nchar(time[i,1])-1)
}
colnames(time)[7] = ".id"
for (i in 1:nrow(time)) {
  time[i,8] <- substr(time[i,1], 1, nchar(time[i,1])-2)
}
colnames(time)[8] = "group"
for (i in 1:nrow(time)) {
  time[i,9] <- paste(substr(time[i,1], 1, nchar(time[i,1])-2), substr(time[i,1], nchar(time[i,1]), nchar(time[i,1])), sep="")
}
colnames(time)[9] = "take"
for (i in 2:6) {
  time[,i] <- as.numeric(as.character(time[,i]))
}

time = time[order(time$take),]

setwd("/Users/Joan/Google Drive/Article Twitter Streams and Removal Rates/TeX")
tiff("Fig1.tif", width=2124, height=2324, pointsize=12, res=300)

par(mfrow=c(3,2), oma = c(1,2.2,2,0), mar=c(3.5,3,2,1)) #B, L, U, R

for (i in 1:6) {
  name = unique(time$group)[i]
  plot(time$match.id[time$group==name], ylim=c(0,1), ylab="", xaxt="n",xlab="", main=name, cex.main=0.9,pch=19)
  abline(v=c(9.5, 18.5, 27.5), col="red", lty=2)
  segments(-1.5, mean(time$match.id[time$take==paste0(name, 1)]), 9.475, mean(time$match.id[time$take==paste0(name, 1)]), col="cyan", lwd=2)
  segments(9.525, mean(time$match.id[time$take==paste0(name, 2)]), 18.475, mean(time$match.id[time$take==paste0(name, 2)]), col="cyan", lwd=2)
  segments(18.525, mean(time$match.id[time$take==paste0(name, 3)]), 27.5, mean(time$match.id[time$take==paste0(name, 3)]), col="cyan", lwd=2)
  segments(27.5, mean(time$match.id[time$take==paste0(name, 4)]), 37.5, mean(time$match.id[time$take==paste0(name, 4)]), col="cyan", lwd=2)
  axis(1, at=c(seq(1,36,by=1)),labels=as.numeric(c(1,rep("",7),9,rep("",8),18,rep("",8),27,rep("",8),36)),cex=0.2)
  #axis(3, tck=0, at=c(4.5, 14, 23.5, 33),cex=0.2, labels=c(paste(name, 1, sep=" "), 2, 3, 4))
  legend(0,0.1, c("Mean"), cex=0.7, lty = 1, col=c("cyan"),bty='n')
  legend(0,0.23, c("Avg. N:"), cex=0.7,bty='n')
  legend(0,0.17, c(round(mean(time$total[time$group==name]), digits = 2)), cex=0.7,bty='n')
}
mtext(text=substitute(bold("Take")),side=1,line=-1,outer=TRUE, cex=0.8)
mtext(text=substitute(bold("Percent Match")),side=2,line=0,outer=TRUE, cex=0.8)
mtext(substitute(italic(bold("Match Percent per Take"))),outer = TRUE, font=2, cex = 1)
mtext("Note: 'Take' is common x-axis; 'Percent Match' is common y-axis. Dashed red lines represent the 4 blocks of 9 takes each.", outer = TRUE, side=1, adj=0,font=3, cex = 0.55)

dev.off()


tiff("Fig2.tif", width=2124, height=2324, pointsize=12, res=300)

par(mfrow=c(3,2), oma = c(1,2.2,2,0), mar=c(3.5,3,2,1)) #B, L, U, R

for (i in 1:6) {
  name = unique(time$group)[i]
  plot(time$onlyfw[time$group==name],xlim=c(0,36), ylim=c(0,1), ylab="",xaxt="n",xlab="", main=name, cex.main=0.9,pch=19)
  abline(v=c(9.5, 18.5, 27.5), col="red", lty=2)
  segments(-1.5, mean(time$onlyfw[time$take==paste0(name, 1)]), 9.475, mean(time$onlyfw[time$take==paste0(name, 1)]), col="cyan", lwd=2)
  segments(9.525, mean(time$onlyfw[time$take==paste0(name, 2)]), 18.475, mean(time$onlyfw[time$take==paste0(name, 2)]), col="cyan", lwd=2)
  segments(18.525, mean(time$onlyfw[time$take==paste0(name, 3)]), 27.5, mean(time$onlyfw[time$take==paste0(name, 3)]), col="cyan", lwd=2)
  segments(27.5, mean(time$onlyfw[time$take==paste0(name, 4)]), 37.5, mean(time$onlyfw[time$take==paste0(name, 4)]), col="cyan", lwd=2)
  #axis(3, tck=0, at=c(4.5, 14, 23.5, 33), labels=c(paste(name, 1, sep=" "), 2, 3, 4))
  axis(1, at=c(seq(1,36,by=1)),labels=as.numeric(c(1,rep("",7),9,rep("",8),18,rep("",8),27,rep("",8),36)),cex=0.2)
  legend(0,0.88, c("Mean"), cex=0.7, lty = 1, col=c("cyan"),bty='n')
  legend(0,1, c("Avg. N:"), cex=0.7,bty='n')
  legend(0,0.95, c(round(mean(time$total[time$group==name]), digits = 2)), cex=0.7,bty='n')
}
mtext(text=substitute(bold("Take")),side=1,line=-1,outer=TRUE, cex=0.8)
mtext(text=substitute(bold("Percent Only in Stream API")),side=2,line=0,outer=TRUE, cex=0.8)
mtext(substitute(italic(bold("Only in Stream API, Percent per Take"))),outer = TRUE, font=2, cex = 1)
mtext("Note: 'Take' is common x-axis; 'Percent Only in Stream API' is common y-axis. Dashed red lines represent the 4 blocks of 9 takes each.", outer = TRUE, side=1, adj=0,font=3, cex = 0.55)

dev.off()


tiff("Fig3.tif", width=2124, height=2324, pointsize=12, res=300)

par(mfrow=c(3,2), oma = c(1,2.2,2,0), mar=c(3.5,3,2,1)) #B, L, U, R

for (i in 1:6) {
  name = unique(time$group)[i]
  plot(time$onlybw[time$group==name],xlim=c(0,36), ylim=c(0,1), ylab="",xaxt="n",xlab="", main=name, cex.main=0.9,pch=19)
  abline(v=c(9.5, 18.5, 27.5), col="red", lty=2)
  segments(-1.5, mean(time$onlybw[time$take==paste0(name, 1)]), 9.475, mean(time$onlybw[time$take==paste0(name, 1)]), col="cyan", lwd=2)
  segments(9.525, mean(time$onlybw[time$take==paste0(name, 2)]), 18.475, mean(time$onlybw[time$take==paste0(name, 2)]), col="cyan", lwd=2)
  segments(18.525, mean(time$onlybw[time$take==paste0(name, 3)]), 27.5, mean(time$onlybw[time$take==paste0(name, 3)]), col="cyan", lwd=2)
  segments(27.5, mean(time$onlybw[time$take==paste0(name, 4)]), 37.5, mean(time$onlybw[time$take==paste0(name, 4)]), col="cyan", lwd=2)
  #axis(3, tck=0, at=c(4.5, 14, 23.5, 33), labels=c(paste(name, 1, sep=" "), 2, 3, 4))
  axis(1, at=c(seq(1,36,by=1)),labels=as.numeric(c(1,rep("",7),9,rep("",8),18,rep("",8),27,rep("",8),36)),cex=0.2)
  legend(0,0.88, c("Mean"), cex=0.7, lty = 1, col=c("cyan"),bty='n')
  legend(0,1, c("Avg. N:"), cex=0.7,bty='n')
  legend(0,0.95, c(round(mean(time$total[time$group==name]), digits = 2)), cex=0.7,bty='n')
}
mtext(text=substitute(bold("Take")),side=1,line=-1,outer=TRUE, cex=0.8)
mtext(text=substitute(bold("Percent Only in Search API")),side=2,line=0,outer=TRUE, cex=0.8)
mtext("Note: 'Take' is common x-axis; 'Percent Only in Search API' is common y-axis. Dashed red lines represent the 4 blocks of 9 takes each.", outer = TRUE, side=1, adj=0,font=3, cex = 0.55)
mtext(substitute(italic(bold("Only in Search API, Percent per Take"))),outer = TRUE, font=2, cex = 1)

dev.off()


# CAN WE DO ALL IN ONE? CRAZY?